import xgboost as xgb
import shap
import numpy as np
import pandas as pd
import matplotlib.pylab as pl
shap.initjs()
#import XGBoost model from R into python
rmodel = xgb.Booster(model_file = 'Master.XGBModel')
#import data set into python
qsprdf = pd.read_csv("xgbpython.csv")
tracer = qsprdf['Tracer']
qspr = qsprdf.drop(['Tracer', 'Tracer_Conc'], axis=1)
#assign X and Y for predictions
Y = qsprdf['Tracer_Conc']
X = qspr
d_test = xgb.DMatrix(X, label=Y)
#for waterfall and forceplots
explainer = shap.TreeExplainer(rmodel)
shap_values = explainer(X)
shap.force_plot(shap_values[56000:59999,:], out_names = 'tert-Butylbenzene') #tert-Butylbenzene forceplot for entire BTC
shap.plots.force is slow for many thousands of rows, try subsampling your data.
shap.force_plot(shap_values[60000:63999,:], out_names = 'tert-Butylbenzene(2)') #tert-Butylbenzene(2) forceplot for entire BTC
shap.force_plot(shap_values[64000:67999,:], out_names = '1,2,4,5-Tetramethylbenzene') #1,2,4,5-Tetramethylbenzene forceplot for entire BTC
shap.force_plot(shap_values[68000:71999,:], out_names = 'Pentamethylbenzene') #Pentamethylbenzene forceplot for entire BTC
shap.force_plot(shap_values[72000:75999,:], out_names = 'Pentamethylbenzene(2)') #Pentamethylbenzene(2) forceplot for entire BTC
import os
os.system('jupyter nbconvert --to html yourNotebook.ipynb')